{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "2aa1c7d8",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd \n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "83cc9b91",
   "metadata": {},
   "source": [
    "# 读取数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "e9a3e549",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>客户编号</th>\n",
       "      <th>已发货款</th>\n",
       "      <th>资产成本</th>\n",
       "      <th>贷款与资产比列</th>\n",
       "      <th>品牌</th>\n",
       "      <th>骑车销售商</th>\n",
       "      <th>车厂</th>\n",
       "      <th>出生日期</th>\n",
       "      <th>货款日期</th>\n",
       "      <th>地区</th>\n",
       "      <th>...</th>\n",
       "      <th>尚未还清有效贷款总额</th>\n",
       "      <th>已批准贷款总额</th>\n",
       "      <th>已发放贷款总额</th>\n",
       "      <th>每月还款总额</th>\n",
       "      <th>贷款与已还贷款比列</th>\n",
       "      <th>主账户还款期数</th>\n",
       "      <th>次账户还款期数</th>\n",
       "      <th>贷款与已批准贷款比列</th>\n",
       "      <th>总贷款次数与总有效贷款次数比</th>\n",
       "      <th>工作类型</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>601758</td>\n",
       "      <td>65532</td>\n",
       "      <td>78990</td>\n",
       "      <td>84.38</td>\n",
       "      <td>136</td>\n",
       "      <td>20490</td>\n",
       "      <td>45</td>\n",
       "      <td>1981</td>\n",
       "      <td>2018</td>\n",
       "      <td>8</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>519488</td>\n",
       "      <td>56759</td>\n",
       "      <td>65325</td>\n",
       "      <td>89.55</td>\n",
       "      <td>61</td>\n",
       "      <td>22778</td>\n",
       "      <td>86</td>\n",
       "      <td>1967</td>\n",
       "      <td>2018</td>\n",
       "      <td>6</td>\n",
       "      <td>...</td>\n",
       "      <td>2054139</td>\n",
       "      <td>2036500</td>\n",
       "      <td>2036500</td>\n",
       "      <td>34455</td>\n",
       "      <td>0.99</td>\n",
       "      <td>59</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.33</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>447579</td>\n",
       "      <td>58413</td>\n",
       "      <td>67960</td>\n",
       "      <td>89.02</td>\n",
       "      <td>5</td>\n",
       "      <td>15663</td>\n",
       "      <td>86</td>\n",
       "      <td>1977</td>\n",
       "      <td>2018</td>\n",
       "      <td>9</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>648134</td>\n",
       "      <td>72317</td>\n",
       "      <td>99750</td>\n",
       "      <td>73.68</td>\n",
       "      <td>76</td>\n",
       "      <td>17242</td>\n",
       "      <td>48</td>\n",
       "      <td>1995</td>\n",
       "      <td>2018</td>\n",
       "      <td>8</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>13813</td>\n",
       "      <td>13813</td>\n",
       "      <td>0</td>\n",
       "      <td>13814.00</td>\n",
       "      <td>13813</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.00</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>458210</td>\n",
       "      <td>50078</td>\n",
       "      <td>65450</td>\n",
       "      <td>79.45</td>\n",
       "      <td>146</td>\n",
       "      <td>14181</td>\n",
       "      <td>45</td>\n",
       "      <td>1974</td>\n",
       "      <td>2018</td>\n",
       "      <td>17</td>\n",
       "      <td>...</td>\n",
       "      <td>467161</td>\n",
       "      <td>550000</td>\n",
       "      <td>550000</td>\n",
       "      <td>12863</td>\n",
       "      <td>1.18</td>\n",
       "      <td>42</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.06</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 49 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     客户编号   已发货款   资产成本  贷款与资产比列   品牌  骑车销售商  车厂  出生日期  货款日期  地区  ...  \\\n",
       "0  601758  65532  78990    84.38  136  20490  45  1981  2018   8  ...   \n",
       "1  519488  56759  65325    89.55   61  22778  86  1967  2018   6  ...   \n",
       "2  447579  58413  67960    89.02    5  15663  86  1977  2018   9  ...   \n",
       "3  648134  72317  99750    73.68   76  17242  48  1995  2018   8  ...   \n",
       "4  458210  50078  65450    79.45  146  14181  45  1974  2018  17  ...   \n",
       "\n",
       "   尚未还清有效贷款总额  已批准贷款总额  已发放贷款总额  每月还款总额  贷款与已还贷款比列  主账户还款期数  次账户还款期数  \\\n",
       "0           0        0        0       0       1.00        0        0   \n",
       "1     2054139  2036500  2036500   34455       0.99       59        0   \n",
       "2           0        0        0       0       1.00        0        0   \n",
       "3           0    13813    13813       0   13814.00    13813        0   \n",
       "4      467161   550000   550000   12863       1.18       42        0   \n",
       "\n",
       "   贷款与已批准贷款比列  总贷款次数与总有效贷款次数比  工作类型  \n",
       "0         1.0            1.00     0  \n",
       "1         1.0            1.33     1  \n",
       "2         1.0            1.00     1  \n",
       "3         1.0            2.00     0  \n",
       "4         1.0            1.06     1  \n",
       "\n",
       "[5 rows x 49 columns]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Data = pd.read_csv('C:/Users/Xl/Desktop/阶段九模块1（作业）/车贷违约预测.csv',encoding='GB2312')\n",
    "Data.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8fd893e5",
   "metadata": {},
   "source": [
    "# 异常值处理"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "d95d9989",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "Data_1 = Data.drop(['客户编号','品牌', '骑车销售商', '车厂', '出生日期', '货款日期',\n",
    "       '地区', '对接员工编号','是否填写手机号', '受否填写身份证', '是否出具驾驶证', '是否填写护照', '是否违约', '工作类型'],axis=1)\n",
    "Data_2 = Data[['客户编号','品牌', '骑车销售商', '车厂', '出生日期', '货款日期',\n",
    "       '地区', '对接员工编号','是否填写手机号', '受否填写身份证', '是否出具驾驶证', '是否填写护照', '是否违约', '工作类型']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "f417dbbc",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>客户编号</th>\n",
       "      <th>已发货款</th>\n",
       "      <th>资产成本</th>\n",
       "      <th>贷款与资产比列</th>\n",
       "      <th>品牌</th>\n",
       "      <th>骑车销售商</th>\n",
       "      <th>车厂</th>\n",
       "      <th>出生日期</th>\n",
       "      <th>货款日期</th>\n",
       "      <th>地区</th>\n",
       "      <th>...</th>\n",
       "      <th>尚未还清有效贷款总额</th>\n",
       "      <th>已批准贷款总额</th>\n",
       "      <th>已发放贷款总额</th>\n",
       "      <th>每月还款总额</th>\n",
       "      <th>贷款与已还贷款比列</th>\n",
       "      <th>主账户还款期数</th>\n",
       "      <th>次账户还款期数</th>\n",
       "      <th>贷款与已批准贷款比列</th>\n",
       "      <th>总贷款次数与总有效贷款次数比</th>\n",
       "      <th>工作类型</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>601758</td>\n",
       "      <td>65532</td>\n",
       "      <td>78990</td>\n",
       "      <td>84.38</td>\n",
       "      <td>136</td>\n",
       "      <td>20490</td>\n",
       "      <td>45</td>\n",
       "      <td>1981</td>\n",
       "      <td>2018</td>\n",
       "      <td>8</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>447579</td>\n",
       "      <td>58413</td>\n",
       "      <td>67960</td>\n",
       "      <td>89.02</td>\n",
       "      <td>5</td>\n",
       "      <td>15663</td>\n",
       "      <td>86</td>\n",
       "      <td>1977</td>\n",
       "      <td>2018</td>\n",
       "      <td>9</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>623755</td>\n",
       "      <td>55299</td>\n",
       "      <td>68556</td>\n",
       "      <td>84.60</td>\n",
       "      <td>5</td>\n",
       "      <td>14347</td>\n",
       "      <td>86</td>\n",
       "      <td>1992</td>\n",
       "      <td>2018</td>\n",
       "      <td>9</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>613879</td>\n",
       "      <td>34911</td>\n",
       "      <td>51648</td>\n",
       "      <td>72.80</td>\n",
       "      <td>78</td>\n",
       "      <td>17075</td>\n",
       "      <td>48</td>\n",
       "      <td>1989</td>\n",
       "      <td>2018</td>\n",
       "      <td>4</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>469813</td>\n",
       "      <td>56013</td>\n",
       "      <td>67294</td>\n",
       "      <td>86.19</td>\n",
       "      <td>251</td>\n",
       "      <td>22969</td>\n",
       "      <td>45</td>\n",
       "      <td>1990</td>\n",
       "      <td>2018</td>\n",
       "      <td>13</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>199701</th>\n",
       "      <td>476759</td>\n",
       "      <td>62063</td>\n",
       "      <td>70100</td>\n",
       "      <td>89.66</td>\n",
       "      <td>2</td>\n",
       "      <td>14834</td>\n",
       "      <td>86</td>\n",
       "      <td>1982</td>\n",
       "      <td>2018</td>\n",
       "      <td>4</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>199702</th>\n",
       "      <td>604679</td>\n",
       "      <td>53328</td>\n",
       "      <td>80900</td>\n",
       "      <td>67.99</td>\n",
       "      <td>8</td>\n",
       "      <td>23293</td>\n",
       "      <td>86</td>\n",
       "      <td>1991</td>\n",
       "      <td>2018</td>\n",
       "      <td>3</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>199708</th>\n",
       "      <td>440510</td>\n",
       "      <td>60113</td>\n",
       "      <td>73691</td>\n",
       "      <td>84.00</td>\n",
       "      <td>162</td>\n",
       "      <td>17094</td>\n",
       "      <td>86</td>\n",
       "      <td>1993</td>\n",
       "      <td>2018</td>\n",
       "      <td>4</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>199713</th>\n",
       "      <td>598007</td>\n",
       "      <td>52303</td>\n",
       "      <td>72677</td>\n",
       "      <td>72.93</td>\n",
       "      <td>34</td>\n",
       "      <td>15142</td>\n",
       "      <td>86</td>\n",
       "      <td>1985</td>\n",
       "      <td>2018</td>\n",
       "      <td>6</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>199715</th>\n",
       "      <td>634336</td>\n",
       "      <td>54509</td>\n",
       "      <td>71921</td>\n",
       "      <td>77.86</td>\n",
       "      <td>74</td>\n",
       "      <td>16846</td>\n",
       "      <td>45</td>\n",
       "      <td>1983</td>\n",
       "      <td>2018</td>\n",
       "      <td>4</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>88518 rows × 49 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          客户编号   已发货款   资产成本  贷款与资产比列   品牌  骑车销售商  车厂  出生日期  货款日期  地区  ...  \\\n",
       "0       601758  65532  78990    84.38  136  20490  45  1981  2018   8  ...   \n",
       "2       447579  58413  67960    89.02    5  15663  86  1977  2018   9  ...   \n",
       "12      623755  55299  68556    84.60    5  14347  86  1992  2018   9  ...   \n",
       "14      613879  34911  51648    72.80   78  17075  48  1989  2018   4  ...   \n",
       "16      469813  56013  67294    86.19  251  22969  45  1990  2018  13  ...   \n",
       "...        ...    ...    ...      ...  ...    ...  ..   ...   ...  ..  ...   \n",
       "199701  476759  62063  70100    89.66    2  14834  86  1982  2018   4  ...   \n",
       "199702  604679  53328  80900    67.99    8  23293  86  1991  2018   3  ...   \n",
       "199708  440510  60113  73691    84.00  162  17094  86  1993  2018   4  ...   \n",
       "199713  598007  52303  72677    72.93   34  15142  86  1985  2018   6  ...   \n",
       "199715  634336  54509  71921    77.86   74  16846  45  1983  2018   4  ...   \n",
       "\n",
       "        尚未还清有效贷款总额  已批准贷款总额  已发放贷款总额  每月还款总额  贷款与已还贷款比列  主账户还款期数  次账户还款期数  \\\n",
       "0                0        0        0       0        1.0        0        0   \n",
       "2                0        0        0       0        1.0        0        0   \n",
       "12               0        0        0       0        1.0        0        0   \n",
       "14               0        0        0       0        1.0        0        0   \n",
       "16               0        0        0       0        1.0        0        0   \n",
       "...            ...      ...      ...     ...        ...      ...      ...   \n",
       "199701           0        0        0       0        1.0        0        0   \n",
       "199702           0        0        0       0        1.0        0        0   \n",
       "199708           0        0        0       0        1.0        0        0   \n",
       "199713           0        0        0       0        1.0        0        0   \n",
       "199715           0        0        0       0        1.0        0        0   \n",
       "\n",
       "        贷款与已批准贷款比列  总贷款次数与总有效贷款次数比  工作类型  \n",
       "0              1.0             1.0     0  \n",
       "2              1.0             1.0     1  \n",
       "12             1.0             1.0     0  \n",
       "14             1.0             1.0     0  \n",
       "16             1.0             1.0     0  \n",
       "...            ...             ...   ...  \n",
       "199701         1.0             1.0     0  \n",
       "199702         1.0             1.0     1  \n",
       "199708         1.0             1.0     1  \n",
       "199713         1.0             1.0     0  \n",
       "199715         1.0             1.0     1  \n",
       "\n",
       "[88518 rows x 49 columns]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#利用线箱图上、下须确定异常值\n",
    "Q1 = Data_1.quantile(0.25)\n",
    "Q3 = Data_1.quantile(0.75)\n",
    "low_whisker = Q1-1.5*(Q3-Q1)\n",
    "up_whisker = Q3+1.5*(Q3-Q1)\n",
    "\n",
    "#找出异常值索引\n",
    "a = (Data_1<low_whisker) | (Data_1>up_whisker)\n",
    "data_index = Data_1.loc[a.any(axis=1)].index\n",
    "\n",
    "#样本数据足够，删除异常值即可\n",
    "Data = Data.drop(data_index,axis=0)\n",
    "Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "13854be7",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>count</th>\n",
       "      <th>mean</th>\n",
       "      <th>std</th>\n",
       "      <th>min</th>\n",
       "      <th>25%</th>\n",
       "      <th>50%</th>\n",
       "      <th>75%</th>\n",
       "      <th>max</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>客户编号</th>\n",
       "      <td>88518.0</td>\n",
       "      <td>534772.840258</td>\n",
       "      <td>68524.016992</td>\n",
       "      <td>417430.000000</td>\n",
       "      <td>474779.250000</td>\n",
       "      <td>534126.000000</td>\n",
       "      <td>594690.750000</td>\n",
       "      <td>658676.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>已发货款</th>\n",
       "      <td>88518.0</td>\n",
       "      <td>52315.492081</td>\n",
       "      <td>8864.325994</td>\n",
       "      <td>27129.000000</td>\n",
       "      <td>46369.000000</td>\n",
       "      <td>52579.000000</td>\n",
       "      <td>58208.750000</td>\n",
       "      <td>80104.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>资产成本</th>\n",
       "      <td>88518.0</td>\n",
       "      <td>71654.095743</td>\n",
       "      <td>9710.082172</td>\n",
       "      <td>45550.000000</td>\n",
       "      <td>65467.000000</td>\n",
       "      <td>70161.500000</td>\n",
       "      <td>76438.500000</td>\n",
       "      <td>99326.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>贷款与资产比列</th>\n",
       "      <td>88518.0</td>\n",
       "      <td>75.606268</td>\n",
       "      <td>9.790173</td>\n",
       "      <td>46.450000</td>\n",
       "      <td>69.800000</td>\n",
       "      <td>77.270000</td>\n",
       "      <td>83.420000</td>\n",
       "      <td>95.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>品牌</th>\n",
       "      <td>88518.0</td>\n",
       "      <td>73.496419</td>\n",
       "      <td>70.764741</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>15.000000</td>\n",
       "      <td>61.000000</td>\n",
       "      <td>120.000000</td>\n",
       "      <td>261.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>骑车销售商</th>\n",
       "      <td>88518.0</td>\n",
       "      <td>19748.595461</td>\n",
       "      <td>3424.246098</td>\n",
       "      <td>10524.000000</td>\n",
       "      <td>16639.000000</td>\n",
       "      <td>20512.000000</td>\n",
       "      <td>22998.000000</td>\n",
       "      <td>24793.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>车厂</th>\n",
       "      <td>88518.0</td>\n",
       "      <td>68.717933</td>\n",
       "      <td>21.851862</td>\n",
       "      <td>45.000000</td>\n",
       "      <td>45.000000</td>\n",
       "      <td>86.000000</td>\n",
       "      <td>86.000000</td>\n",
       "      <td>145.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>出生日期</th>\n",
       "      <td>88518.0</td>\n",
       "      <td>1985.099505</td>\n",
       "      <td>9.733160</td>\n",
       "      <td>1954.000000</td>\n",
       "      <td>1979.000000</td>\n",
       "      <td>1987.000000</td>\n",
       "      <td>1993.000000</td>\n",
       "      <td>2000.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>货款日期</th>\n",
       "      <td>88518.0</td>\n",
       "      <td>2018.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>2018.000000</td>\n",
       "      <td>2018.000000</td>\n",
       "      <td>2018.000000</td>\n",
       "      <td>2018.000000</td>\n",
       "      <td>2018.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>地区</th>\n",
       "      <td>88518.0</td>\n",
       "      <td>7.466075</td>\n",
       "      <td>4.626680</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>6.000000</td>\n",
       "      <td>11.000000</td>\n",
       "      <td>22.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>对接员工编号</th>\n",
       "      <td>88518.0</td>\n",
       "      <td>1540.475497</td>\n",
       "      <td>974.720958</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>700.000000</td>\n",
       "      <td>1449.000000</td>\n",
       "      <td>2353.000000</td>\n",
       "      <td>3794.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>是否填写手机号</th>\n",
       "      <td>88518.0</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>受否填写身份证</th>\n",
       "      <td>88518.0</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>是否出具驾驶证</th>\n",
       "      <td>88518.0</td>\n",
       "      <td>0.021171</td>\n",
       "      <td>0.143954</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>是否填写护照</th>\n",
       "      <td>88518.0</td>\n",
       "      <td>0.001672</td>\n",
       "      <td>0.040856</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>信用评分</th>\n",
       "      <td>88518.0</td>\n",
       "      <td>65.701609</td>\n",
       "      <td>216.027685</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>879.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>主账户贷款次数</th>\n",
       "      <td>88518.0</td>\n",
       "      <td>0.133690</td>\n",
       "      <td>0.419078</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>4.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>主账户有效贷款次数</th>\n",
       "      <td>88518.0</td>\n",
       "      <td>0.011534</td>\n",
       "      <td>0.112044</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>2.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>主账户中尚未还清有效贷款</th>\n",
       "      <td>88518.0</td>\n",
       "      <td>302.700897</td>\n",
       "      <td>3627.390324</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>89614.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>主账户中已批准的贷款</th>\n",
       "      <td>88518.0</td>\n",
       "      <td>385.999345</td>\n",
       "      <td>4645.969421</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>140000.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>主账户中已发放贷款</th>\n",
       "      <td>88518.0</td>\n",
       "      <td>385.998565</td>\n",
       "      <td>4645.966136</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>140000.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>次账户贷款次数</th>\n",
       "      <td>88518.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>次账户有效贷款次数</th>\n",
       "      <td>88518.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>次账户中尚未还清有效贷款</th>\n",
       "      <td>88518.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>次账户中已批准贷款</th>\n",
       "      <td>88518.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>次账户中已发放贷款</th>\n",
       "      <td>88518.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>主账户每月还款</th>\n",
       "      <td>88518.0</td>\n",
       "      <td>108.924388</td>\n",
       "      <td>497.942733</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>5000.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>次账户没用还款</th>\n",
       "      <td>88518.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>近六个月新贷款次数</th>\n",
       "      <td>88518.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>近六个月违约次数</th>\n",
       "      <td>88518.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>平均贷款期限</th>\n",
       "      <td>88518.0</td>\n",
       "      <td>1.296041</td>\n",
       "      <td>4.672804</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>32.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>第一次贷款距今时间</th>\n",
       "      <td>88518.0</td>\n",
       "      <td>1.419960</td>\n",
       "      <td>5.164613</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>50.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>贷款查询次数</th>\n",
       "      <td>88518.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>是否违约</th>\n",
       "      <td>88518.0</td>\n",
       "      <td>0.185894</td>\n",
       "      <td>0.389024</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>贷款与资产比</th>\n",
       "      <td>88518.0</td>\n",
       "      <td>0.732626</td>\n",
       "      <td>0.097258</td>\n",
       "      <td>0.446871</td>\n",
       "      <td>0.674215</td>\n",
       "      <td>0.747593</td>\n",
       "      <td>0.808117</td>\n",
       "      <td>0.937987</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>贷款总次数</th>\n",
       "      <td>88518.0</td>\n",
       "      <td>0.133690</td>\n",
       "      <td>0.419078</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>4.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>主账户无效贷款次数</th>\n",
       "      <td>88518.0</td>\n",
       "      <td>0.122156</td>\n",
       "      <td>0.390878</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>2.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>次账户无效贷款次数</th>\n",
       "      <td>88518.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>无效贷款总次数</th>\n",
       "      <td>88518.0</td>\n",
       "      <td>0.122156</td>\n",
       "      <td>0.390878</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>2.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>尚未还清有效贷款总额</th>\n",
       "      <td>88518.0</td>\n",
       "      <td>302.700897</td>\n",
       "      <td>3627.390324</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>89614.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>已批准贷款总额</th>\n",
       "      <td>88518.0</td>\n",
       "      <td>385.999345</td>\n",
       "      <td>4645.969421</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>140000.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>已发放贷款总额</th>\n",
       "      <td>88518.0</td>\n",
       "      <td>385.998565</td>\n",
       "      <td>4645.966136</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>140000.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>每月还款总额</th>\n",
       "      <td>88518.0</td>\n",
       "      <td>108.924388</td>\n",
       "      <td>497.942733</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>5000.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>贷款与已还贷款比列</th>\n",
       "      <td>88518.0</td>\n",
       "      <td>1.002572</td>\n",
       "      <td>0.033626</td>\n",
       "      <td>0.680000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.650000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>主账户还款期数</th>\n",
       "      <td>88518.0</td>\n",
       "      <td>0.198592</td>\n",
       "      <td>2.329976</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>62.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>次账户还款期数</th>\n",
       "      <td>88518.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>贷款与已批准贷款比列</th>\n",
       "      <td>88518.0</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>总贷款次数与总有效贷款次数比</th>\n",
       "      <td>88518.0</td>\n",
       "      <td>1.008763</td>\n",
       "      <td>0.087798</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>工作类型</th>\n",
       "      <td>88518.0</td>\n",
       "      <td>0.499288</td>\n",
       "      <td>0.571942</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  count           mean           std            min  \\\n",
       "客户编号            88518.0  534772.840258  68524.016992  417430.000000   \n",
       "已发货款            88518.0   52315.492081   8864.325994   27129.000000   \n",
       "资产成本            88518.0   71654.095743   9710.082172   45550.000000   \n",
       "贷款与资产比列         88518.0      75.606268      9.790173      46.450000   \n",
       "品牌              88518.0      73.496419     70.764741       1.000000   \n",
       "骑车销售商           88518.0   19748.595461   3424.246098   10524.000000   \n",
       "车厂              88518.0      68.717933     21.851862      45.000000   \n",
       "出生日期            88518.0    1985.099505      9.733160    1954.000000   \n",
       "货款日期            88518.0    2018.000000      0.000000    2018.000000   \n",
       "地区              88518.0       7.466075      4.626680       1.000000   \n",
       "对接员工编号          88518.0    1540.475497    974.720958       1.000000   \n",
       "是否填写手机号         88518.0       1.000000      0.000000       1.000000   \n",
       "受否填写身份证         88518.0       1.000000      0.000000       1.000000   \n",
       "是否出具驾驶证         88518.0       0.021171      0.143954       0.000000   \n",
       "是否填写护照          88518.0       0.001672      0.040856       0.000000   \n",
       "信用评分            88518.0      65.701609    216.027685       0.000000   \n",
       "主账户贷款次数         88518.0       0.133690      0.419078       0.000000   \n",
       "主账户有效贷款次数       88518.0       0.011534      0.112044       0.000000   \n",
       "主账户中尚未还清有效贷款    88518.0     302.700897   3627.390324       0.000000   \n",
       "主账户中已批准的贷款      88518.0     385.999345   4645.969421       0.000000   \n",
       "主账户中已发放贷款       88518.0     385.998565   4645.966136       0.000000   \n",
       "次账户贷款次数         88518.0       0.000000      0.000000       0.000000   \n",
       "次账户有效贷款次数       88518.0       0.000000      0.000000       0.000000   \n",
       "次账户中尚未还清有效贷款    88518.0       0.000000      0.000000       0.000000   \n",
       "次账户中已批准贷款       88518.0       0.000000      0.000000       0.000000   \n",
       "次账户中已发放贷款       88518.0       0.000000      0.000000       0.000000   \n",
       "主账户每月还款         88518.0     108.924388    497.942733       0.000000   \n",
       "次账户没用还款         88518.0       0.000000      0.000000       0.000000   \n",
       "近六个月新贷款次数       88518.0       0.000000      0.000000       0.000000   \n",
       "近六个月违约次数        88518.0       0.000000      0.000000       0.000000   \n",
       "平均贷款期限          88518.0       1.296041      4.672804       0.000000   \n",
       "第一次贷款距今时间       88518.0       1.419960      5.164613       0.000000   \n",
       "贷款查询次数          88518.0       0.000000      0.000000       0.000000   \n",
       "是否违约            88518.0       0.185894      0.389024       0.000000   \n",
       "贷款与资产比          88518.0       0.732626      0.097258       0.446871   \n",
       "贷款总次数           88518.0       0.133690      0.419078       0.000000   \n",
       "主账户无效贷款次数       88518.0       0.122156      0.390878       0.000000   \n",
       "次账户无效贷款次数       88518.0       0.000000      0.000000       0.000000   \n",
       "无效贷款总次数         88518.0       0.122156      0.390878       0.000000   \n",
       "尚未还清有效贷款总额      88518.0     302.700897   3627.390324       0.000000   \n",
       "已批准贷款总额         88518.0     385.999345   4645.969421       0.000000   \n",
       "已发放贷款总额         88518.0     385.998565   4645.966136       0.000000   \n",
       "每月还款总额          88518.0     108.924388    497.942733       0.000000   \n",
       "贷款与已还贷款比列       88518.0       1.002572      0.033626       0.680000   \n",
       "主账户还款期数         88518.0       0.198592      2.329976       0.000000   \n",
       "次账户还款期数         88518.0       0.000000      0.000000       0.000000   \n",
       "贷款与已批准贷款比列      88518.0       1.000000      0.000000       1.000000   \n",
       "总贷款次数与总有效贷款次数比  88518.0       1.008763      0.087798       1.000000   \n",
       "工作类型            88518.0       0.499288      0.571942       0.000000   \n",
       "\n",
       "                          25%            50%            75%            max  \n",
       "客户编号            474779.250000  534126.000000  594690.750000  658676.000000  \n",
       "已发货款             46369.000000   52579.000000   58208.750000   80104.000000  \n",
       "资产成本             65467.000000   70161.500000   76438.500000   99326.000000  \n",
       "贷款与资产比列             69.800000      77.270000      83.420000      95.000000  \n",
       "品牌                  15.000000      61.000000     120.000000     261.000000  \n",
       "骑车销售商            16639.000000   20512.000000   22998.000000   24793.000000  \n",
       "车厂                  45.000000      86.000000      86.000000     145.000000  \n",
       "出生日期              1979.000000    1987.000000    1993.000000    2000.000000  \n",
       "货款日期              2018.000000    2018.000000    2018.000000    2018.000000  \n",
       "地区                   4.000000       6.000000      11.000000      22.000000  \n",
       "对接员工编号             700.000000    1449.000000    2353.000000    3794.000000  \n",
       "是否填写手机号              1.000000       1.000000       1.000000       1.000000  \n",
       "受否填写身份证              1.000000       1.000000       1.000000       1.000000  \n",
       "是否出具驾驶证              0.000000       0.000000       0.000000       1.000000  \n",
       "是否填写护照               0.000000       0.000000       0.000000       1.000000  \n",
       "信用评分                 0.000000       0.000000       0.000000     879.000000  \n",
       "主账户贷款次数              0.000000       0.000000       0.000000       4.000000  \n",
       "主账户有效贷款次数            0.000000       0.000000       0.000000       2.000000  \n",
       "主账户中尚未还清有效贷款         0.000000       0.000000       0.000000   89614.000000  \n",
       "主账户中已批准的贷款           0.000000       0.000000       0.000000  140000.000000  \n",
       "主账户中已发放贷款            0.000000       0.000000       0.000000  140000.000000  \n",
       "次账户贷款次数              0.000000       0.000000       0.000000       0.000000  \n",
       "次账户有效贷款次数            0.000000       0.000000       0.000000       0.000000  \n",
       "次账户中尚未还清有效贷款         0.000000       0.000000       0.000000       0.000000  \n",
       "次账户中已批准贷款            0.000000       0.000000       0.000000       0.000000  \n",
       "次账户中已发放贷款            0.000000       0.000000       0.000000       0.000000  \n",
       "主账户每月还款              0.000000       0.000000       0.000000    5000.000000  \n",
       "次账户没用还款              0.000000       0.000000       0.000000       0.000000  \n",
       "近六个月新贷款次数            0.000000       0.000000       0.000000       0.000000  \n",
       "近六个月违约次数             0.000000       0.000000       0.000000       0.000000  \n",
       "平均贷款期限               0.000000       0.000000       0.000000      32.000000  \n",
       "第一次贷款距今时间            0.000000       0.000000       0.000000      50.000000  \n",
       "贷款查询次数               0.000000       0.000000       0.000000       0.000000  \n",
       "是否违约                 0.000000       0.000000       0.000000       1.000000  \n",
       "贷款与资产比               0.674215       0.747593       0.808117       0.937987  \n",
       "贷款总次数                0.000000       0.000000       0.000000       4.000000  \n",
       "主账户无效贷款次数            0.000000       0.000000       0.000000       2.000000  \n",
       "次账户无效贷款次数            0.000000       0.000000       0.000000       0.000000  \n",
       "无效贷款总次数              0.000000       0.000000       0.000000       2.000000  \n",
       "尚未还清有效贷款总额           0.000000       0.000000       0.000000   89614.000000  \n",
       "已批准贷款总额              0.000000       0.000000       0.000000  140000.000000  \n",
       "已发放贷款总额              0.000000       0.000000       0.000000  140000.000000  \n",
       "每月还款总额               0.000000       0.000000       0.000000    5000.000000  \n",
       "贷款与已还贷款比列            1.000000       1.000000       1.000000       1.650000  \n",
       "主账户还款期数              0.000000       0.000000       0.000000      62.000000  \n",
       "次账户还款期数              0.000000       0.000000       0.000000       0.000000  \n",
       "贷款与已批准贷款比列           1.000000       1.000000       1.000000       1.000000  \n",
       "总贷款次数与总有效贷款次数比       1.000000       1.000000       1.000000       2.000000  \n",
       "工作类型                 0.000000       0.000000       1.000000       2.000000  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Data.describe().T"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6d514f71",
   "metadata": {},
   "source": [
    "# 缺失值处理"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "158a307c",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "客户编号              0\n",
       "已发货款              0\n",
       "资产成本              0\n",
       "贷款与资产比列           0\n",
       "品牌                0\n",
       "骑车销售商             0\n",
       "车厂                0\n",
       "出生日期              0\n",
       "货款日期              0\n",
       "地区                0\n",
       "对接员工编号            0\n",
       "是否填写手机号           0\n",
       "受否填写身份证           0\n",
       "是否出具驾驶证           0\n",
       "是否填写护照            0\n",
       "信用评分              0\n",
       "主账户贷款次数           0\n",
       "主账户有效贷款次数         0\n",
       "主账户中尚未还清有效贷款      0\n",
       "主账户中已批准的贷款        0\n",
       "主账户中已发放贷款         0\n",
       "次账户贷款次数           0\n",
       "次账户有效贷款次数         0\n",
       "次账户中尚未还清有效贷款      0\n",
       "次账户中已批准贷款         0\n",
       "次账户中已发放贷款         0\n",
       "主账户每月还款           0\n",
       "次账户没用还款           0\n",
       "近六个月新贷款次数         0\n",
       "近六个月违约次数          0\n",
       "平均贷款期限            0\n",
       "第一次贷款距今时间         0\n",
       "贷款查询次数            0\n",
       "是否违约              0\n",
       "贷款与资产比            0\n",
       "贷款总次数             0\n",
       "主账户无效贷款次数         0\n",
       "次账户无效贷款次数         0\n",
       "无效贷款总次数           0\n",
       "尚未还清有效贷款总额        0\n",
       "已批准贷款总额           0\n",
       "已发放贷款总额           0\n",
       "每月还款总额            0\n",
       "贷款与已还贷款比列         0\n",
       "主账户还款期数           0\n",
       "次账户还款期数           0\n",
       "贷款与已批准贷款比列        0\n",
       "总贷款次数与总有效贷款次数比    0\n",
       "工作类型              0\n",
       "dtype: int64"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#无缺失值\n",
    "Data.isna().sum()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "72cd27f0",
   "metadata": {},
   "source": [
    "# 衍生字段"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "97cf56b3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>客户编号</th>\n",
       "      <th>已发货款</th>\n",
       "      <th>资产成本</th>\n",
       "      <th>贷款与资产比列</th>\n",
       "      <th>品牌</th>\n",
       "      <th>骑车销售商</th>\n",
       "      <th>车厂</th>\n",
       "      <th>出生日期</th>\n",
       "      <th>货款日期</th>\n",
       "      <th>地区</th>\n",
       "      <th>...</th>\n",
       "      <th>主账户还款期数</th>\n",
       "      <th>次账户还款期数</th>\n",
       "      <th>贷款与已批准贷款比列</th>\n",
       "      <th>总贷款次数与总有效贷款次数比</th>\n",
       "      <th>工作类型</th>\n",
       "      <th>C1</th>\n",
       "      <th>C2</th>\n",
       "      <th>C3</th>\n",
       "      <th>C4</th>\n",
       "      <th>C5</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>601758</td>\n",
       "      <td>65532</td>\n",
       "      <td>78990</td>\n",
       "      <td>84.38</td>\n",
       "      <td>136</td>\n",
       "      <td>20490</td>\n",
       "      <td>45</td>\n",
       "      <td>1981</td>\n",
       "      <td>2018</td>\n",
       "      <td>8</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>447579</td>\n",
       "      <td>58413</td>\n",
       "      <td>67960</td>\n",
       "      <td>89.02</td>\n",
       "      <td>5</td>\n",
       "      <td>15663</td>\n",
       "      <td>86</td>\n",
       "      <td>1977</td>\n",
       "      <td>2018</td>\n",
       "      <td>9</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>623755</td>\n",
       "      <td>55299</td>\n",
       "      <td>68556</td>\n",
       "      <td>84.60</td>\n",
       "      <td>5</td>\n",
       "      <td>14347</td>\n",
       "      <td>86</td>\n",
       "      <td>1992</td>\n",
       "      <td>2018</td>\n",
       "      <td>9</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>613879</td>\n",
       "      <td>34911</td>\n",
       "      <td>51648</td>\n",
       "      <td>72.80</td>\n",
       "      <td>78</td>\n",
       "      <td>17075</td>\n",
       "      <td>48</td>\n",
       "      <td>1989</td>\n",
       "      <td>2018</td>\n",
       "      <td>4</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>469813</td>\n",
       "      <td>56013</td>\n",
       "      <td>67294</td>\n",
       "      <td>86.19</td>\n",
       "      <td>251</td>\n",
       "      <td>22969</td>\n",
       "      <td>45</td>\n",
       "      <td>1990</td>\n",
       "      <td>2018</td>\n",
       "      <td>13</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>199701</th>\n",
       "      <td>476759</td>\n",
       "      <td>62063</td>\n",
       "      <td>70100</td>\n",
       "      <td>89.66</td>\n",
       "      <td>2</td>\n",
       "      <td>14834</td>\n",
       "      <td>86</td>\n",
       "      <td>1982</td>\n",
       "      <td>2018</td>\n",
       "      <td>4</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>199702</th>\n",
       "      <td>604679</td>\n",
       "      <td>53328</td>\n",
       "      <td>80900</td>\n",
       "      <td>67.99</td>\n",
       "      <td>8</td>\n",
       "      <td>23293</td>\n",
       "      <td>86</td>\n",
       "      <td>1991</td>\n",
       "      <td>2018</td>\n",
       "      <td>3</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>199708</th>\n",
       "      <td>440510</td>\n",
       "      <td>60113</td>\n",
       "      <td>73691</td>\n",
       "      <td>84.00</td>\n",
       "      <td>162</td>\n",
       "      <td>17094</td>\n",
       "      <td>86</td>\n",
       "      <td>1993</td>\n",
       "      <td>2018</td>\n",
       "      <td>4</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>199713</th>\n",
       "      <td>598007</td>\n",
       "      <td>52303</td>\n",
       "      <td>72677</td>\n",
       "      <td>72.93</td>\n",
       "      <td>34</td>\n",
       "      <td>15142</td>\n",
       "      <td>86</td>\n",
       "      <td>1985</td>\n",
       "      <td>2018</td>\n",
       "      <td>6</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>199715</th>\n",
       "      <td>634336</td>\n",
       "      <td>54509</td>\n",
       "      <td>71921</td>\n",
       "      <td>77.86</td>\n",
       "      <td>74</td>\n",
       "      <td>16846</td>\n",
       "      <td>45</td>\n",
       "      <td>1983</td>\n",
       "      <td>2018</td>\n",
       "      <td>4</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>88518 rows × 54 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          客户编号   已发货款   资产成本  贷款与资产比列   品牌  骑车销售商  车厂  出生日期  货款日期  地区  ...  \\\n",
       "0       601758  65532  78990    84.38  136  20490  45  1981  2018   8  ...   \n",
       "2       447579  58413  67960    89.02    5  15663  86  1977  2018   9  ...   \n",
       "12      623755  55299  68556    84.60    5  14347  86  1992  2018   9  ...   \n",
       "14      613879  34911  51648    72.80   78  17075  48  1989  2018   4  ...   \n",
       "16      469813  56013  67294    86.19  251  22969  45  1990  2018  13  ...   \n",
       "...        ...    ...    ...      ...  ...    ...  ..   ...   ...  ..  ...   \n",
       "199701  476759  62063  70100    89.66    2  14834  86  1982  2018   4  ...   \n",
       "199702  604679  53328  80900    67.99    8  23293  86  1991  2018   3  ...   \n",
       "199708  440510  60113  73691    84.00  162  17094  86  1993  2018   4  ...   \n",
       "199713  598007  52303  72677    72.93   34  15142  86  1985  2018   6  ...   \n",
       "199715  634336  54509  71921    77.86   74  16846  45  1983  2018   4  ...   \n",
       "\n",
       "        主账户还款期数  次账户还款期数  贷款与已批准贷款比列  总贷款次数与总有效贷款次数比  工作类型  C1  C2  C3  C4  C5  \n",
       "0             0        0         1.0             1.0     0   0   0   0   1   0  \n",
       "2             0        0         1.0             1.0     1   0   0   0   1   0  \n",
       "12            0        0         1.0             1.0     0   0   0   0   1   0  \n",
       "14            0        0         1.0             1.0     0   0   0   0   1   0  \n",
       "16            0        0         1.0             1.0     0   0   0   0   1   0  \n",
       "...         ...      ...         ...             ...   ...  ..  ..  ..  ..  ..  \n",
       "199701        0        0         1.0             1.0     0   0   0   0   1   0  \n",
       "199702        0        0         1.0             1.0     1   0   0   0   1   0  \n",
       "199708        0        0         1.0             1.0     1   0   0   0   1   0  \n",
       "199713        0        0         1.0             1.0     0   0   0   0   1   0  \n",
       "199715        0        0         1.0             1.0     1   0   0   0   1   0  \n",
       "\n",
       "[88518 rows x 54 columns]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#主、次账户还款期数之和\n",
    "C1 = Data['主账户还款期数']+Data['次账户还款期数']\n",
    "Data['C1'] = C1\n",
    "#未批准贷款总额\n",
    "C2 = Data['已批准贷款总额']-Data['已发放贷款总额']\n",
    "Data['C2'] = C2\n",
    "#每月还款总额\n",
    "C3 = Data['主账户每月还款']+Data['次账户没用还款']\n",
    "Data['C3'] = C3\n",
    "#是否同时填写手机号、身份证\n",
    "C4 = Data['是否填写手机号']*Data['受否填写身份证']\n",
    "Data['C4'] = C4\n",
    "#主、次账户有效贷款次数之和\n",
    "C5 = Data['主账户有效贷款次数']*Data['次账户有效贷款次数']\n",
    "Data['C5'] = C5\n",
    "\n",
    "Data"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "18e14929",
   "metadata": {},
   "source": [
    "# 失衡数据判断并处理"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "8157680c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.18589439436046906"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 查看标签比例，少类比例占18.5%，大于10%，不认为失衡\n",
    "Data['是否违约'].value_counts()[1]/Data['是否违约'].shape[0]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ab8dd6b0",
   "metadata": {},
   "source": [
    "# 多个备选模型比较"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "eef7e142",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import time\n",
    "# 模型处理模块\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.model_selection import GridSearchCV\n",
    "# 标准化处理模块\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "# 常规模型\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.neighbors import KNeighborsClassifier\n",
    "from sklearn.svm import SVC\n",
    "from sklearn.tree import DecisionTreeClassifier\n",
    "# 集成学习和stacking模型\n",
    "from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier, RandomForestClassifier\n",
    "import xgboost as xgb\n",
    "from xgboost.sklearn import XGBClassifier\n",
    "from mlxtend.classifier import StackingClassifier\n",
    "# 评价标准模块\n",
    "from sklearn import metrics\n",
    "from sklearn.metrics import accuracy_score,roc_auc_score,recall_score,precision_score\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "147ef276",
   "metadata": {},
   "outputs": [],
   "source": [
    "#数据切分\n",
    "X = Data.drop('是否违约',axis=1)\n",
    "y = Data['是否违约']\n",
    "X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=6)\n",
    "\n",
    "#模型比较\n",
    "def train_model(X_train, y_train, X_test, y_test,\n",
    "               model,model_name):\n",
    "    \n",
    "    print('训练{}'.format(model_name))\n",
    "    \n",
    "    clf=model\n",
    "    start = time.time()\n",
    "    clf.fit(X_train, y_train.values.ravel())\n",
    "    \n",
    "     #验证模型\n",
    "    print('训练准确率：{:.4f}'.format(clf.score(X_train, y_train)))\n",
    "    \n",
    "    \n",
    "    predict=clf.predict(X_test)\n",
    "    score = clf.score(X_test, y_test)\n",
    "    precision=precision_score(y_test,predict)\n",
    "    recall=recall_score(y_test,predict)\n",
    "    print('测试准确率：{:.4f}'.format(score))\n",
    "    print('测试精确率：{:.4f}'.format(precision))\n",
    "    print('测试召回率：{:.4f}'.format(recall))\n",
    "    \n",
    "    end = time.time()\n",
    "    duration = end - start\n",
    "    print('模型训练耗时：{:6f}s'.format(duration))\n",
    "    \n",
    "    \n",
    "    return clf, score,precision,recall, duration"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "635bec55",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "训练LR\n",
      "训练准确率：0.8130\n",
      "测试准确率：0.8165\n",
      "测试精确率：0.0000\n",
      "测试召回率：0.0000\n",
      "模型训练耗时：1.749036s\n",
      "训练DT\n",
      "训练准确率：0.8240\n",
      "测试准确率：0.8047\n",
      "测试精确率：0.2581\n",
      "测试召回率：0.0343\n",
      "模型训练耗时：0.885118s\n",
      "训练AdaBoost\n",
      "训练准确率：0.8131\n",
      "测试准确率：0.8164\n",
      "测试精确率：0.2857\n",
      "测试召回率：0.0004\n",
      "模型训练耗时：8.557001s\n",
      "训练GBDT\n",
      "训练准确率：0.8135\n",
      "测试准确率：0.8163\n",
      "测试精确率：0.2222\n",
      "测试召回率：0.0004\n",
      "模型训练耗时：25.054864s\n",
      "训练RF\n",
      "训练准确率：1.0000\n",
      "测试准确率：0.8138\n",
      "测试精确率：0.3856\n",
      "测试召回率：0.0252\n",
      "模型训练耗时：23.343601s\n",
      "训练XGBoost\n",
      "[00:29:37] WARNING: C:/Users/Administrator/workspace/xgboost-win64_release_1.5.1/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n",
      "训练准确率：0.8298\n",
      "测试准确率：0.8137\n",
      "测试精确率：0.3846\n",
      "测试召回率：0.0257\n",
      "模型训练耗时：9.213947s\n"
     ]
    }
   ],
   "source": [
    "model_name_param_dict = {    'LR': (LogisticRegression(penalty =\"l2\")),\n",
    "                             'DT': (DecisionTreeClassifier(max_depth=10,min_samples_split=10)),\n",
    "                             'AdaBoost': (AdaBoostClassifier()),\n",
    "                             'GBDT': (GradientBoostingClassifier()),\n",
    "                             'RF': (RandomForestClassifier()),\n",
    "                             'XGBoost':(XGBClassifier())\n",
    "                         }\n",
    "\n",
    "result_df = pd.DataFrame(columns=['Accuracy (%)','precision(%)','recall(%)','Time (s)'],\n",
    "                             index=list(model_name_param_dict.keys()))\n",
    "\n",
    "for model_name, model in model_name_param_dict.items():\n",
    "    clf, acc,pre,recall, mean_duration = train_model(X_train, y_train,\n",
    "                                                        X_test, y_test,\n",
    "                                                        model,model_name)\n",
    "    result_df.loc[model_name, 'Accuracy (%)'] = acc\n",
    "    result_df.loc[model_name, 'precision(%)'] = pre\n",
    "    result_df.loc[model_name, 'recall(%)'] = recall\n",
    "    result_df.loc[model_name, 'Time (s)'] = mean_duration "
   ]
  },
  {
   "cell_type": "markdown",
   "id": "56e77549",
   "metadata": {},
   "source": [
    "# 网格搜索调优"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "04831c37",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'max_depth': 8,\n",
       " 'max_features': 50,\n",
       " 'min_samples_leaf': 30,\n",
       " 'min_samples_split': 40,\n",
       " 'n_estimators': 20}"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.model_selection import GridSearchCV,KFold, cross_val_score\n",
    "\n",
    "#param_grid = {'n_estimators': [20, 50, 100,300], 'max_features': [10,20,30,40,50],\"max_depth\":[4,6,8,10,12],\n",
    "#             \"min_samples_split\": [10,20,30,40],\"min_samples_leaf\": [5,10,20,30]},\n",
    "\n",
    "#为模型能正常创建，可以少设置几个参数选项，让其跑通代码\n",
    "param_grid = {'n_estimators': [20], 'max_features': [50],\"max_depth\":[6,8],\n",
    "             \"min_samples_split\": [40],\"min_samples_leaf\": [30]},\n",
    "\n",
    "model = RandomForestClassifier()\n",
    "grid_search = GridSearchCV(model, param_grid, cv=5, scoring='roc_auc')\n",
    "result = grid_search.fit(X_train, y_train)\n",
    "result.best_params_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "4932d040",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "最优模型对象 RandomForestClassifier(max_depth=8, max_features=50, min_samples_leaf=30,\n",
      "                       min_samples_split=40, n_estimators=20)\n",
      "最优模型分数 0.6213126355051618\n",
      "最优模型参数 {'max_depth': 8, 'max_features': 50, 'min_samples_leaf': 30, 'min_samples_split': 40, 'n_estimators': 20}\n"
     ]
    }
   ],
   "source": [
    "print('最优模型对象',result.best_estimator_)\n",
    "print('最优模型分数',result.best_score_)\n",
    "print('最优模型参数',result.best_params_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "f7c9879e",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.6818181818181818 0.00615637184485943\n"
     ]
    }
   ],
   "source": [
    "#精准率和召回率    在提供的有限参数里，模型质量没有默认的好\n",
    "pre = result.predict(X_test)\n",
    "print(precision_score(y_test,pre),recall_score(y_test,pre))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "185fbab4",
   "metadata": {},
   "source": [
    "# 优质模型保存"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c55e3cfa",
   "metadata": {},
   "outputs": [],
   "source": [
    "#优质模型保存\n",
    "from sklearn.externals import joblib\n",
    "#保存模型\n",
    "joblib.dump(temp,'model.model')\n",
    "\n",
    "#加载模型\n",
    "#clf=joblib.load('model.model')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.5"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": true
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
